discretewqLook at the sampling effort for the various WQ parameters from discretewq
to be included in the long-term WQ publication.
# Load packages
library(tidyverse)
library(lubridate)
library(hms)
library(scales)
library(discretewq)
library(deltamapr)
library(sf)
library(leaflet)
library(dtplyr)
library(here)
# Check if we are in the correct working directory
i_am("Sampling_Effort_discretewq.Rmd")
## here() starts at C:/Repositories/04_IEP_Org/WQ-LT-Publication
# Run session info to display package versions
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.2.1 (2022-06-23 ucrt)
## os Windows 10 x64 (build 19044)
## system x86_64, mingw32
## ui RTerm
## language (EN)
## collate English_United States.utf8
## ctype English_United States.utf8
## tz America/Los_Angeles
## date 2022-08-23
## pandoc 2.18 @ C:/Program Files/RStudio/bin/quarto/bin/tools/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.2.1)
## backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0)
## broom 1.0.0 2022-07-01 [1] CRAN (R 4.2.1)
## bslib 0.4.0 2022-07-16 [1] CRAN (R 4.2.1)
## cachem 1.0.6 2021-08-19 [1] CRAN (R 4.2.1)
## callr 3.7.1 2022-07-13 [1] CRAN (R 4.2.1)
## cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.2.1)
## class 7.3-20 2022-01-16 [2] CRAN (R 4.2.1)
## classInt 0.4-7 2022-06-10 [1] CRAN (R 4.2.1)
## cli 3.3.0 2022-04-25 [1] CRAN (R 4.2.1)
## colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.2.1)
## crayon 1.5.1 2022-03-26 [1] CRAN (R 4.2.1)
## crosstalk 1.2.0 2021-11-04 [1] CRAN (R 4.2.1)
## data.table 1.14.2 2021-09-27 [1] CRAN (R 4.2.1)
## DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.1)
## dbplyr 2.2.1 2022-06-27 [1] CRAN (R 4.2.1)
## deltamapr * 1.0.0 2021-06-18 [1] Github (InteragencyEcologicalProgram/deltamapr@d0a6f9c)
## devtools 2.4.4 2022-07-20 [1] CRAN (R 4.2.1)
## digest 0.6.29 2021-12-01 [1] CRAN (R 4.2.1)
## discretewq * 2.3.2 2022-08-18 [1] local
## dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.2.1)
## dtplyr * 1.2.1 2022-01-19 [1] CRAN (R 4.2.1)
## e1071 1.7-11 2022-06-07 [1] CRAN (R 4.2.1)
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.1)
## evaluate 0.15 2022-02-18 [1] CRAN (R 4.2.1)
## fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.1)
## fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.1)
## forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.2.1)
## fs 1.5.2 2021-12-08 [1] CRAN (R 4.2.1)
## gargle 1.2.0 2021-07-02 [1] CRAN (R 4.2.1)
## generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.1)
## ggplot2 * 3.3.6 2022-05-03 [1] CRAN (R 4.2.1)
## glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.1)
## googledrive 2.0.0 2021-07-08 [1] CRAN (R 4.2.1)
## googlesheets4 1.0.0 2021-07-21 [1] CRAN (R 4.2.1)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 4.2.1)
## haven 2.5.0 2022-04-15 [1] CRAN (R 4.2.1)
## here * 1.0.1 2020-12-13 [1] CRAN (R 4.2.1)
## hms * 1.1.1 2021-09-26 [1] CRAN (R 4.2.1)
## htmltools 0.5.3 2022-07-18 [1] CRAN (R 4.2.1)
## htmlwidgets 1.5.4 2021-09-08 [1] CRAN (R 4.2.1)
## httpuv 1.6.5 2022-01-05 [1] CRAN (R 4.2.1)
## httr 1.4.3 2022-05-04 [1] CRAN (R 4.2.1)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.2.1)
## jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.2.1)
## KernSmooth 2.23-20 2021-05-03 [2] CRAN (R 4.2.1)
## knitr 1.39 2022-04-26 [1] CRAN (R 4.2.1)
## later 1.3.0 2021-08-18 [1] CRAN (R 4.2.1)
## leaflet * 2.1.1 2022-03-23 [1] CRAN (R 4.2.1)
## lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.2.1)
## lubridate * 1.8.0 2021-10-07 [1] CRAN (R 4.2.1)
## magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.1)
## memoise 2.0.1 2021-11-26 [1] CRAN (R 4.2.1)
## mime 0.12 2021-09-28 [1] CRAN (R 4.2.0)
## miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 4.2.1)
## modelr 0.1.8 2020-05-19 [1] CRAN (R 4.2.1)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.1)
## pillar 1.8.0 2022-07-18 [1] CRAN (R 4.2.1)
## pkgbuild 1.3.1 2021-12-20 [1] CRAN (R 4.2.1)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.1)
## pkgload 1.3.0 2022-06-27 [1] CRAN (R 4.2.1)
## prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.2.1)
## processx 3.7.0 2022-07-07 [1] CRAN (R 4.2.1)
## profvis 0.3.7 2020-11-02 [1] CRAN (R 4.2.1)
## promises 1.2.0.1 2021-02-11 [1] CRAN (R 4.2.1)
## proxy 0.4-27 2022-06-09 [1] CRAN (R 4.2.1)
## ps 1.7.1 2022-06-18 [1] CRAN (R 4.2.1)
## purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.2.1)
## R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.1)
## Rcpp 1.0.9 2022-07-08 [1] CRAN (R 4.2.1)
## readr * 2.1.2 2022-01-30 [1] CRAN (R 4.2.1)
## readxl 1.4.0 2022-03-28 [1] CRAN (R 4.2.1)
## remotes 2.4.2 2021-11-30 [1] CRAN (R 4.2.1)
## reprex 2.0.1 2021-08-05 [1] CRAN (R 4.2.1)
## rlang 1.0.4 2022-07-12 [1] CRAN (R 4.2.1)
## rmarkdown 2.14 2022-04-25 [1] CRAN (R 4.2.1)
## rprojroot 2.0.3 2022-04-02 [1] CRAN (R 4.2.1)
## rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.2.1)
## rvest 1.0.2 2021-10-16 [1] CRAN (R 4.2.1)
## sass 0.4.2 2022-07-16 [1] CRAN (R 4.2.1)
## scales * 1.2.0 2022-04-13 [1] CRAN (R 4.2.1)
## sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.1)
## sf * 1.0-8 2022-07-14 [1] CRAN (R 4.2.1)
## shiny 1.7.2 2022-07-19 [1] CRAN (R 4.2.1)
## stringi 1.7.8 2022-07-11 [1] CRAN (R 4.2.1)
## stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.2.1)
## tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.1)
## tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.2.1)
## tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.2.1)
## tidyverse * 1.3.2 2022-07-18 [1] CRAN (R 4.2.1)
## tzdb 0.3.0 2022-03-28 [1] CRAN (R 4.2.1)
## units 0.8-0 2022-02-05 [1] CRAN (R 4.2.1)
## urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.2.1)
## usethis 2.1.6 2022-05-25 [1] CRAN (R 4.2.1)
## utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.1)
## vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.2.1)
## withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.1)
## xfun 0.31 2022-05-10 [1] CRAN (R 4.2.1)
## xml2 1.3.3 2021-11-30 [1] CRAN (R 4.2.1)
## xtable 1.8-4 2019-04-21 [1] CRAN (R 4.2.1)
## yaml 2.3.5 2022-02-21 [1] CRAN (R 4.2.1)
##
## [1] C:/R/win-library/4.2
## [2] C:/Program Files/R/R-4.2.1/library
##
## ──────────────────────────────────────────────────────────────────────────────
# Pull in and prepare the WQ data from discretewq
df_dwq <-
wq(
Sources = c(
"EMP",
"STN",
"FMWT",
"EDSM",
"DJFMP",
"SDO",
"SKT",
"SLS",
"20mm",
"Suisun",
"Baystudy",
"USBR",
"USGS_SFBS",
"YBFMP",
"USGS_CAWSC"
)
) %>%
transmute(
Source,
Station,
Latitude,
Longitude,
Date = date(Date),
# Convert Datetime to PST
Datetime = with_tz(Datetime, tzone = "Etc/GMT+8"),
Temperature,
Salinity,
Secchi,
Chlorophyll,
DissAmmonia,
DissNitrateNitrite,
DissOrthophos
) %>%
filter(
!if_all(
c(Temperature, Salinity, Secchi, Chlorophyll, DissAmmonia, DissNitrateNitrite, DissOrthophos),
is.na
)
)
Now that we’ve been making updates to the WQ data in the discretewq
package, let’s take a look at which surveys we can use for the
long-term WQ publication. First, we’ll look at the temporal scale of all
of the surveys available.
df_dwq %>%
group_by(Source) %>%
summarize(min_date = min(Date), max_date = max(Date)) %>%
arrange(min_date)
## # A tibble: 15 × 3
## Source min_date max_date
## <chr> <date> <date>
## 1 FMWT 1967-09-12 2021-12-16
## 2 USGS_SFBS 1969-04-10 2022-05-09
## 3 STN 1969-07-05 2021-08-19
## 4 USGS_CAWSC 1971-03-02 2021-03-10
## 5 EMP 1975-01-07 2021-12-16
## 6 DJFMP 1976-05-13 2021-12-29
## 7 Suisun 1979-05-16 2021-08-25
## 8 Baystudy 1980-01-23 2020-12-01
## 9 20mm 1995-04-24 2021-07-16
## 10 SDO 1997-08-04 2018-11-08
## 11 YBFMP 1998-01-19 2021-12-30
## 12 SKT 2002-01-07 2021-04-29
## 13 SLS 2009-01-05 2021-03-17
## 14 USBR 2012-05-08 2019-10-22
## 15 EDSM 2016-12-15 2021-11-26
Overall, for all parameters, it looks like FMWT, USGS_SFBS, STN, USGS_CAWSC, EMP, DJFMP, Suisun, and Baystudy have adequate temporal coverage for the long-term analysis. Now let’s take a look at the spatial coverage of these surveys.
# Only include surveys with adequate temporal coverage
df_dwq_lt <- df_dwq %>%
filter(Source %in% c("FMWT", "USGS_SFBS", "STN", "USGS_CAWSC", "EMP", "DJFMP", "Suisun", "Baystudy"))
# Pull out station coordinates and convert to sf object
sf_dwq_lt <- df_dwq_lt %>%
distinct(Source, Station, Latitude, Longitude) %>%
drop_na(Latitude, Longitude) %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326) %>%
st_transform(st_crs(R_EDSM_Subregions_Mahardja_FLOAT))
# Load Delta Shapefile from Brian and only keep SubRegions east of Carquinez Straight
sf_delta <- R_EDSM_Subregions_Mahardja_FLOAT %>%
filter(
!SubRegion %in% c(
"Carquinez Strait",
"Lower Napa River",
"San Francisco Bay",
"San Pablo Bay",
"South Bay",
"Upper Napa River"
)
) %>%
select(SubRegion)
# Plot all stations over the SubRegions
ggplot() +
geom_sf(data = sf_delta, fill = "green", alpha = 0.5) +
geom_sf(data = sf_dwq_lt) +
theme_bw()
# Assign SubRegions to the stations
sf_dwq_lt_reg <- sf_dwq_lt %>%
st_join(sf_delta, join = st_intersects) %>%
filter(!is.na(SubRegion))
# Remove SubRegions without stations from sf_delta
sf_delta_c <- sf_delta %>% filter(SubRegion %in% unique(sf_dwq_lt_reg$SubRegion))
# Plot all stations over the SubRegions again - with limited ranges
ggplot() +
geom_sf(data = sf_delta_c, fill = "blue", alpha = 0.5) +
geom_sf(data = sf_dwq_lt_reg) +
theme_bw()
All but one of the SubRegions east of Carquinez Straight has at least one station from a long-term survey within it. Now let’s take a closer look at the temporal data coverage for each Station and parameter.
# Prepare long-term data from discretewq
df_dwq_lt_c <- df_dwq_lt %>%
# Remove records without latitude-longitude coordinates
drop_na(Latitude, Longitude) %>%
# Convert to sf object
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE) %>%
# Change to crs of sf_delta_c
st_transform(crs = st_crs(sf_delta_c)) %>%
# Add subregions
st_join(sf_delta_c, join = st_intersects) %>%
# Remove any data outside our subregions of interest
filter(!is.na(SubRegion)) %>%
# Drop sf geometry column since it's no longer needed
st_drop_geometry() %>%
# Add variables for adjusted calendar year, month, and season
# Adjusted calendar year: December-November, with December of the previous calendar year
# included with the following year
mutate(
Month = month(Date),
YearAdj = if_else(Month == 12, year(Date) + 1, year(Date)),
Season = case_when(
Month %in% 3:5 ~ "Spring",
Month %in% 6:8 ~ "Summer",
Month %in% 9:11 ~ "Fall",
Month %in% c(12, 1, 2) ~ "Winter"
)
) %>%
# Restrict data to 1975-2021
filter(YearAdj %in% 1975:2021)
# Prepare data separately for each parameter
prep_samp_effort <- function(df, param) {
# Remove any NA values in parameter of interest
df_param <- df %>% drop_na(.data[[param]])
# Look for any instances when more than 1 data point was collected at a station-day
df_dups <- df_param %>%
count(Source, Station, Date) %>%
filter(n > 1) %>%
select(-n)
# Fix duplicates
df_dups_fixed <- df_param %>%
inner_join(df_dups, by = c("Source", "Station", "Date")) %>%
drop_na(Datetime) %>%
mutate(
# Create variable for time
Time = as_hms(Datetime),
# Calculate difference from noon for each data point for later filtering
Noon_diff = abs(hms(hours = 12) - Time)
) %>%
# Use dtplyr to speed up operations
lazy_dt() %>%
group_by(Station, Date) %>%
# Select only 1 data point per station and date, choose data closest to noon
filter(Noon_diff == min(Noon_diff)) %>%
# When points are equidistant from noon, select earlier point
filter(Time == min(Time)) %>%
ungroup() %>%
# End dtplyr operation
as_tibble() %>%
select(-c(Time, Noon_diff))
# Add back fixed duplicates and format data frame
df_param %>%
anti_join(df_dups, by = c("Source", "Station", "Date")) %>%
bind_rows(df_dups_fixed) %>%
select(
Source,
Station,
Latitude,
Longitude,
SubRegion,
YearAdj,
Month,
Season,
Date,
Datetime,
.data[[param]]
)
}
# Create a nested data frame to run functions on
ndf_dwq_lt <-
tibble(
Parameter = c(
"Temperature",
"Salinity",
"Secchi",
"DissAmmonia",
"DissNitrateNitrite",
"DissOrthophos",
"Chlorophyll"
),
df_data = rep(list(df_dwq_lt_c), 7)
) %>%
# Prepare data for each Parameter
mutate(df_data = map2(df_data, Parameter, .f = prep_samp_effort))
# Plot function
plot_samp_effort_sta <- function(df) {
df %>%
count(Station, YearAdj, name = "num_samples") %>%
mutate(Station = fct_rev(factor(Station))) %>%
ggplot(aes(x = YearAdj, y = Station, fill = num_samples)) +
geom_tile() +
scale_x_continuous(
limits = c(1974, 2022),
breaks = breaks_pretty(20),
expand = expansion()
) +
scale_fill_viridis_c(name = "Number of Samples") +
theme_bw() +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
legend.position = "top"
)
}
# Create sampling effort by station plots for each Parameter and Source
ndf_dwq_lt_se_sta_plt <- ndf_dwq_lt %>%
mutate(
df_data = map(df_data, ~ nest(.x, df_data2 = -Source)),
df_data = modify_depth(df_data, 1, ~ mutate(.x, plt = map(df_data2, plot_samp_effort_sta)))
)
Not all of the stations from the long-term surveys were sampled consistently from 1975-2021. We need to make sure that we are only using SubRegions that have adequate sample coverage in our analyses. We’ll start with removing the obvious survey-parameter combinations that were only sparsely sampled.
DJFMP only sampled salinity for the past three years, so we won’t include this survey in the salinity analysis. For the USGS-CAWSC stations, only station 11447650 (Sacramento River at Freeport) was sampled on a long-term basis for Dissolved Ammonia, Nitrate+Nitrite, and Orthophosphate. Also, Chlorophyll wasn’t sampled consistently at any of the USGS-CAWSC stations. We’ll exclude this data from our data set before continuing.
ndf_dwq_lt <- ndf_dwq_lt %>%
mutate(
df_data_filt = case_when(
Parameter == "Salinity" ~ modify_depth(df_data, 1, ~ filter(.x, Source != "DJFMP")),
str_detect(Parameter, "^Diss") ~ modify_depth(
df_data, 1, ~ filter(.x, !(Source == "USGS_CAWSC" & Station != "USGS-11447650"))),
Parameter == "Chlorophyll" ~ modify_depth(df_data, 1, ~ filter(.x, Source != "USGS_CAWSC")),
TRUE ~ df_data
)
)
Some of the stations from the Suisun Marsh survey are located in small backwater channels and dead-end sloughs which represent a much different habitat than the sampling locations from the other surveys which tend to be in larger, open water channel habitat. We’ll take a closer look at the Suisun Marsh survey, and only include stations that are in the larger channels to be consistent with the other surveys.
# Pull out stations that are within the SubRegions containing the Suisun Marsh survey -
# Water temperature has a complete list of all stations collected by the Suisun
# Marsh survey, so we'll go with that
suisun_subreg <- ndf_dwq_lt$df_data_filt[[1]] %>%
filter(Source == "Suisun") %>%
distinct(SubRegion) %>%
pull(SubRegion)
sf_suisun_sta <- ndf_dwq_lt$df_data_filt[[1]] %>%
filter(SubRegion %in% suisun_subreg) %>%
distinct(Source, Station, Latitude, Longitude) %>%
# Convert to sf object
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326)
# Define color palette for Surveys
color_pal_survey <- colorFactor(palette = "viridis", domain = sf_suisun_sta$Source)
# Create map using leaflet
leaflet() %>%
addTiles() %>%
addCircleMarkers(
data = sf_suisun_sta,
radius = 5,
fillColor = ~color_pal_survey(Source),
fillOpacity = 0.8,
weight = 0.5,
color = "black",
opacity = 1,
label = paste0("Survey: ", sf_suisun_sta$Source, ", Station: ", sf_suisun_sta$Station)
) %>%
addLegend(
position = "topright",
pal = color_pal_survey,
values = sf_suisun_sta$Source,
title = "Survey:"
)
We’ll keep the stations located in Suisun and Montezuma Sloughs from the Suisun Marsh survey, since they seem to be in the larger channels in the area.
ndf_dwq_lt2 <- ndf_dwq_lt %>%
mutate(
df_data_filt = map(
df_data_filt,
~ filter(.x, !(Source == "Suisun" & !str_detect(Station, "^SU|^MZ")))
)
)
Before we start removing SubRegions and stations, let’s take a look at the sampling effort for each SubRegion and parameter combination.
# Plot function
plot_samp_effort_subreg <- function(df) {
# Create a vector of all possible SubRegions
subreg_all <- sf_delta %>% distinct(SubRegion) %>% pull(SubRegion)
# Create plot
df %>%
count(SubRegion, YearAdj, name = "num_samples") %>%
mutate(SubRegion = fct_rev(factor(SubRegion, levels = subreg_all))) %>%
ggplot(aes(x = YearAdj, y = SubRegion, fill = num_samples)) +
geom_tile() +
scale_x_continuous(
limits = c(1974, 2022),
breaks = breaks_pretty(20),
expand = expansion()
) +
scale_y_discrete(drop = FALSE) +
scale_fill_viridis_c(name = "Number of Samples") +
theme_bw() +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
legend.position = "top"
)
}
# Create sampling effort by SubRegion plots for each Parameter
ndf_dwq_lt_se_subreg_plt <- ndf_dwq_lt2 %>% mutate(plt = map(df_data_filt, plot_samp_effort_subreg))
Since not all of the SubRegions were sampled consistently from 1975-2021, we’ll only keep the SubRegions that contain at least one station that was sampled a majority of the 47 years between 1975 to 2021. To determine an adequate threshold for number of years sampled, we’ll take a look at histograms of the number of years sampled across all stations for each parameter binned by survey.
ndf_dwq_lt_yr_hist <- ndf_dwq_lt2 %>%
mutate(
# Count the number of years sampled for each Station and Parameter
df_count = map(
df_data_filt,
~ distinct(.x, Source, Station, YearAdj) %>%
count(Source, Station, name = "num_years")
),
# Create histograms of number of years sampled for each Parameter
plt_count = map(
df_count,
~ ggplot(.x, aes(num_years)) +
geom_histogram(bins = 30) +
facet_wrap(vars(Source), scales = "free_y")
)
)
After looking at the histograms, we’ll define the long-term or “Core” stations as those that were sampled at least 80% of years between 1975 to 2021, which is 38 years.
ndf_dwq_lt2 <- ndf_dwq_lt2 %>%
mutate(
df_stations = map(
df_data_filt,
~ distinct(.x, Source, Station, YearAdj) %>%
group_by(Source, Station) %>%
mutate(
NumYrs = n(),
StationGrp = case_when(
Source == "EMP" & str_detect(Station, "^C3|^MD10") ~ "Core",
NumYrs >= 38 ~ "Core",
TRUE ~ "Secondary"
)
) %>%
ungroup() %>%
distinct(Source, Station, NumYrs, StationGrp)
)
)
Let’s look at the map of “Core” and “Secondary” stations for water temperature before removing any stations and SubRegions from the analysis. This map excludes EMP’s EZ stations for better visibility of the static stations.
sf_sta_wt_before <- ndf_dwq_lt2$df_data_filt[[1]] %>%
distinct(Source, Station, Latitude, Longitude, SubRegion) %>%
left_join(ndf_dwq_lt2$df_stations[[1]], by = c("Source", "Station")) %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE) %>%
filter(!(Source == "EMP" & str_detect(Station, "^EZ")))
# Convert CRS of Region polygon to WGS84 so that it works with the leaflet map
sf_delta_c_4326 <- st_transform(sf_delta_c, crs = 4326)
# Define color palette for Station Groups
color_pal_sta_grp <- colorFactor(palette = "viridis", domain = sf_sta_wt_before$StationGrp)
# Create map using leaflet
leaflet() %>%
addTiles() %>%
addCircleMarkers(
data = sf_sta_wt_before,
radius = 5,
fillColor = ~color_pal_sta_grp(StationGrp),
fillOpacity = 0.8,
weight = 0.5,
color = "black",
opacity = 1,
label = paste0("Survey: ", sf_sta_wt_before$Source, ", Station: ", sf_sta_wt_before$Station)
) %>%
addPolylines(data = sf_delta_c_4326) %>%
addLegend(
position = "topright",
pal = color_pal_sta_grp,
values = sf_sta_wt_before$StationGrp,
title = "Station Group:"
)
Now, let’s remove the SubRegions that don’t contain at least one “Core” station. We’ll then remove the stations within these under-sampled SubRegions.
ndf_dwq_lt2 <- ndf_dwq_lt2 %>%
mutate(
df_data_filt2 = map2(
df_data_filt, df_stations,
~ left_join(.x, .y, by = c("Source", "Station")) %>%
mutate(StationGrp = factor(StationGrp, levels = c("Secondary", "Core"), ordered = TRUE)) %>%
group_by(SubRegion) %>%
mutate(StationGrp_Max = max(StationGrp)) %>%
ungroup() %>%
filter(StationGrp_Max == "Core") %>%
mutate(StationGrp = as.character(StationGrp))
)
)
Let’s look at the map of “Core” and “Secondary” stations for water temperature after removing any stations and SubRegions from the analysis.
sf_sta_wt_after <- ndf_dwq_lt2$df_data_filt2[[1]] %>%
distinct(Source, Station, Latitude, Longitude, SubRegion, StationGrp) %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE) %>%
filter(!(Source == "EMP" & str_detect(Station, "^EZ")))
# Remove under-sampled SubRegions from the Region polygon
sf_delta_c_4326_filt <- sf_delta_c_4326 %>% filter(SubRegion %in% unique(sf_sta_wt_after$SubRegion))
# Create map using leaflet
leaflet() %>%
addTiles() %>%
addCircleMarkers(
data = sf_sta_wt_after,
radius = 5,
fillColor = ~color_pal_sta_grp(StationGrp),
fillOpacity = 0.8,
weight = 0.5,
color = "black",
opacity = 1,
label = paste0("Survey: ", sf_sta_wt_after$Source, ", Station: ", sf_sta_wt_after$Station)
) %>%
addPolylines(data = sf_delta_c_4326_filt) %>%
addLegend(
position = "topright",
pal = color_pal_sta_grp,
values = sf_sta_wt_after$StationGrp,
title = "Station Group:"
)
Hmmm, now there is a gigantic hole missing in the North-Central Delta. Let’s see how this compares to what we did in the long-term analysis for the 2021 Drought Synthesis report.
# Import Chlorophyll data used in the long-term analysis for the 2021 Drought
# Synthesis report and add lat-long coordinates
df_chla_ds_lt <- read_csv(here("chla_data_stats_LT2.csv")) %>%
mutate(Source = if_else(Source == "USGS-SFBRMP", "USGS_SFBS", Source)) %>%
left_join(
ndf_dwq_lt2$df_data_filt[[1]] %>% distinct(Source, Station, Latitude, Longitude),
by = c("Source", "Station")
)
# Pull out SubRegions for each parameter after using the filtering steps above
ndf_subregion <- ndf_dwq_lt2 %>%
transmute(
Parameter,
df_data_filt2,
df_subreg = map(df_data_filt2, ~ distinct(.x, SubRegion))
) %>%
add_row(
Parameter = c(
"AllSubregions",
"DrtSynthWQ",
"DrtSynthNutr",
"DrtSynthChla"
),
df_data_filt2 = list(
ndf_dwq_lt2$df_data_filt[[1]],
DroughtData::raw_wq_1975_2021,
DroughtData::raw_nutr_1975_2021,
df_chla_ds_lt
),
df_subreg = list(
# Add all possible SubRegions from sf_delta
sf_delta %>% distinct(SubRegion),
# Add SubRegions used in the long-term analysis for the 2021 Drought Synthesis report
DroughtData::raw_wq_1975_2021 %>% distinct(SubRegion),
DroughtData::raw_nutr_1975_2021 %>% distinct(SubRegion),
df_chla_ds_lt %>% distinct(SubRegion)
),
.before = 1
) %>%
mutate(
# Create sf objects for the SubRegions and Stations for each parameter
sf_subreg = map(df_subreg, ~ sf_delta %>% filter(SubRegion %in% unique(.x$SubRegion))),
sf_stations = map(
df_data_filt2,
~ distinct(.x, Source, Station, Latitude, Longitude) %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326) %>%
st_transform(crs = st_crs(sf_delta))
)
)
# Create a list of character vectors to define the suffixes for the reducing full join
lst_suffix <- map(ndf_subregion$Parameter[-1], ~ c("", paste0("_", .x)))
# Create a custom function for the full join
join_subregions <- function(df1, df2, suff) {
full_join(df1, df2, by = "SubRegion", suffix = suff, keep = TRUE)
}
# Define parameter levels
param_levels <- c(
"DrtSynthWQ",
"Temperature",
"Salinity",
"Secchi",
"DrtSynthNutr",
"DissAmmonia",
"DissNitrateNitrite",
"DissOrthophos",
"DrtSynthChla",
"Chlorophyll"
)
# Run reducing full join and clean up data frame for plotting
df_subregion <- reduce2(ndf_subregion$df_subreg, lst_suffix, join_subregions) %>%
mutate(across(contains("_"), ~ if_else(!is.na(.x), "Yes", "No"))) %>%
rename_with(~ str_extract(.x, "(?<=_).+"), contains("_")) %>%
pivot_longer(cols = -SubRegion, names_to = "Parameter", values_to = "Included") %>%
mutate(
Parameter = factor(Parameter, levels = param_levels),
SubRegion = fct_rev(factor(SubRegion))
)
# Create a function for the SubRegion comparison tile plots
plot_reg_comp <- function(df) {
df %>%
ggplot(aes(x = Parameter, y = SubRegion, fill = Included)) +
geom_tile()
}
# Transform WW_Delta shapefile to the CRS of sf_delta
WW_Delta_26910 <- st_transform(WW_Delta, crs = st_crs(sf_delta))
# Define the bounding box for sf_delta with a 2.5 km buffer
bbox_delta <- st_bbox(st_buffer(sf_delta, 2500))
# Create a function for the SubRegion comparison maps
map_reg_comp <- function(sf_reg, sf_sta) {
ggplot() +
geom_sf(
data = WW_Delta_26910,
color = "lightskyblue",
fill = "lightskyblue",
size = 0.1
) +
geom_sf(
data = sf_reg,
fill = "brown",
alpha = 0.2,
size = 0.5
) +
geom_sf(data = sf_sta) +
coord_sf(
xlim = c(bbox_delta$xmin, bbox_delta$xmax),
ylim = c(bbox_delta$ymin, bbox_delta$ymax)
) +
theme_bw()
}
# Add AllSubregions to param_levels
param_levels2 <- c("AllSubregions", param_levels)
# Create SubRegion comparison maps
ndf_subregion_maps <- ndf_subregion %>%
mutate(
plt_map = map2(sf_subreg, sf_stations, map_reg_comp),
Parameter = factor(Parameter, levels = param_levels2)
) %>%
arrange(Parameter)